Introduction

This analysis builds on our previous work on status measurement and clustering. We will focus on three main objectives:

  1. Validation - Cross-validating our status measures with external indicators and performing case studies
  2. Advanced Visualization - Creating interactive and network visualizations to better explore status dimensions
  3. Statistical Testing - Formally testing relationships between different status dimensions

1. Data Loading and Preparation

First, we’ll load our integrated status dataset and any external datasets for validation:

# Load the integrated status dimensions data
integrated_status <- read.csv("integrated_status_dimensions.csv", stringsAsFactors = FALSE)

# Load external validation datasets if available
# For example: CINC scores, GDP data, etc.
# Example (uncomment and modify as needed):
# external_data <- read.csv("external_status_measures.csv", stringsAsFactors = FALSE)

# Preview the data
glimpse(integrated_status)
## Rows: 1,505
## Columns: 29
## $ country                      <chr> "Czechoslovakia", "Egypt", "France", "Ind…
## $ recognition_count            <int> 39, 58, 82, 55, 33, 36, 73, 58, 32, 23, 4…
## $ weighted_recognition         <dbl> 49.625, 60.250, 75.500, 43.750, 31.750, 3…
## $ eigenvector_centrality       <dbl> 0.52822030, 0.71045720, 1.00000000, 0.660…
## $ pagerank                     <dbl> 0.012528771, 0.019578976, 0.037892397, 0.…
## $ authority                    <dbl> 0.54672243, 0.72487574, 1.00000000, 0.665…
## $ betweenness                  <dbl> 0.0244847825, 0.0001410069, 0.0260780360,…
## $ recognition_rate             <dbl> 0.39393939, 0.58585859, 0.82828283, 0.555…
## $ network_inconsistency        <dbl> 0.2902262, 0.8616718, 1.2127113, 0.540182…
## $ outgoing_ties                <int> 59, 61, 83, 44, 34, 32, 78, 60, 44, 25, 5…
## $ recognition_balance          <int> -20, -3, -1, 11, -1, 4, -5, -2, -12, -2, …
## $ recognition_ratio            <dbl> 0.6610169, 0.9508197, 0.9879518, 1.250000…
## $ recognition_status_pca       <dbl> 0.5300235, 0.7343870, 1.0000000, 0.641582…
## $ prestige_status_pca          <dbl> 0.43957121, 0.62576816, 1.00000000, 0.604…
## $ brokerage_status_pca         <dbl> 0.38788572, 0.32058496, 0.63613453, 0.604…
## $ overall_status_network_pca   <dbl> 0.5032924, 0.6427105, 1.0000000, 0.656207…
## $ year                         <int> 1960, 1960, 1960, 1960, 1960, 1960, 1960,…
## $ external_internal_ratio      <dbl> 0.6610169, 0.9508197, 0.9879518, 1.250000…
## $ attribute_status_pca         <dbl> -1.47260670, -1.59817226, 2.71194106, 0.2…
## $ attribute_status_pca_z       <dbl> -0.642036308, -0.696781170, 1.182369075, …
## $ overall_status_network_pca_z <dbl> 0.98399426, 1.66919367, 3.42516686, 1.735…
## $ combined_status_score        <dbl> 0.17097897, 0.48620625, 2.30376797, 0.926…
## $ attribute_status_quartile    <int> 2, 2, 4, 4, 2, 2, 4, 4, 2, 1, 4, 4, 4, 1,…
## $ network_status_quartile      <int> 4, 4, 4, 4, 4, 3, 4, 4, 3, 2, 4, 4, 4, 3,…
## $ combined_status_quartile     <int> 3, 4, 4, 4, 3, 3, 4, 4, 3, 2, 4, 4, 4, 2,…
## $ status_type                  <chr> "Upper-Middle Status", "High Status", "Hi…
## $ status_inconsistency         <dbl> 1.6260306, 2.3659748, 2.2427978, 1.617973…
## $ cluster                      <int> 3, 3, 4, 3, 3, 3, 4, 4, 3, 1, 3, 4, 4, 1,…
## $ hc_cluster                   <int> 1, 2, 3, 2, 1, 1, 2, 2, 1, 4, 1, 3, 3, 4,…

2. Validation Analysis

2.1 Cross-validation with External Status Measures

We’ll compare our status measures with established external indicators to assess validity:

# If you have external data, merge it with the integrated_status data
# Example: 
# validation_data <- integrated_status %>%
#   left_join(external_data, by = c("country", "year"))

# For this example, we'll use CINC as an external measure (if available in your data)
# If CINC is not available in your dataset, replace with another relevant measure

# Check if CINC is available in the data
if("cinc" %in% colnames(integrated_status)) {
  # Correlation analysis
  cor_data <- integrated_status %>%
    select(combined_status_score, attribute_status_pca, overall_status_network_pca, cinc) %>%
    na.omit()
  
  # Correlation matrix
  cor_matrix <- cor(cor_data)
  
  # Visualize correlation matrix
  corrplot(cor_matrix, method = "color", type = "upper", 
           addCoef.col = "black", tl.col = "black", tl.srt = 45,
           title = "Correlation between Status Measures and CINC")
  
  # Scatter plots
  p1 <- ggplot(cor_data, aes(x = cinc, y = combined_status_score)) +
    geom_point(alpha = 0.6) +
    geom_smooth(method = "lm", se = TRUE) +
    labs(x = "CINC Score", y = "Combined Status Score",
         title = "Combined Status vs. CINC") +
    theme_minimal()
  
  p2 <- ggplot(cor_data, aes(x = cinc, y = attribute_status_pca)) +
    geom_point(alpha = 0.6) +
    geom_smooth(method = "lm", se = TRUE) +
    labs(x = "CINC Score", y = "Attribute Status",
         title = "Attribute Status vs. CINC") +
    theme_minimal()
  
  p3 <- ggplot(cor_data, aes(x = cinc, y = overall_status_network_pca)) +
    geom_point(alpha = 0.6) +
    geom_smooth(method = "lm", se = TRUE) +
    labs(x = "CINC Score", y = "Network Status",
         title = "Network Status vs. CINC") +
    theme_minimal()
  
  grid.arrange(p1, p2, p3, ncol = 3)
} else {
  # If CINC is not available, provide instructions for modification
  cat("CINC data not found in the dataset. Please modify this code to use available external measures for validation.")
  
  # Alternative approach - create a sample external measure for demonstration
  # This is just for illustration - replace with actual external data
  integrated_status <- integrated_status %>%
    mutate(
      sample_external_measure = scale(attribute_status_pca + rnorm(n(), sd = 0.5))[,1]
    )
  
  # Correlation with sample external measure
  cor_data <- integrated_status %>%
    select(combined_status_score, attribute_status_pca, overall_status_network_pca, sample_external_measure) %>%
    na.omit()
  
  # Correlation matrix
  cor_matrix <- cor(cor_data)
  
  # Visualize correlation matrix
  corrplot(cor_matrix, method = "color", type = "upper", 
           addCoef.col = "black", tl.col = "black", tl.srt = 45,
           title = "Correlation between Status Measures and Sample External Measure")
}
## CINC data not found in the dataset. Please modify this code to use available external measures for validation.

2.2 Case Studies of Specific Countries

We’ll select a few countries with interesting status trajectories for in-depth case studies:

# Identify countries with interesting status profiles or changes
# 1. Countries with large status changes over time
status_change <- integrated_status %>%
  select(country, year, combined_status_score) %>%
  arrange(country, year) %>%
  group_by(country) %>%
  mutate(
    next_score = lead(combined_status_score),
    score_change = next_score - combined_status_score
  ) %>%
  filter(!is.na(score_change)) %>%
  ungroup()

# Top 5 countries with largest positive change
top_risers <- status_change %>%
  group_by(country) %>%
  summarise(total_change = sum(score_change, na.rm = TRUE)) %>%
  arrange(desc(total_change)) %>%
  head(5)

# Top 5 countries with largest negative change
top_decliners <- status_change %>%
  group_by(country) %>%
  summarise(total_change = sum(score_change, na.rm = TRUE)) %>%
  arrange(total_change) %>%
  head(5)

# 2. Countries with status inconsistency (high attribute, low network or vice versa)
status_inconsistency <- integrated_status %>%
  group_by(country) %>%
  summarise(
    mean_inconsistency = mean(status_inconsistency, na.rm = TRUE),
    n_obs = n()
  ) %>%
  filter(n_obs >= 3) %>%  # At least 3 observations
  arrange(desc(mean_inconsistency)) %>%
  head(5)

# Display the interesting countries
interesting_countries <- c(
  top_risers$country,
  top_decliners$country,
  status_inconsistency$country
)

# Create a data frame with just these countries
case_study_data <- integrated_status %>%
  filter(country %in% interesting_countries)

# Plot the status trajectories of case study countries
ggplot(case_study_data, aes(x = year, y = combined_status_score, color = country, group = country)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  labs(x = "Year", y = "Combined Status Score",
       title = "Status Trajectories of Case Study Countries") +
  theme_minimal() +
  theme(legend.position = "right")

# Create individual plots for top status risers
for(country_name in top_risers$country[1:3]) {  # First 3 top risers
  country_data <- integrated_status %>%
    filter(country == country_name)
  
  p <- ggplot(country_data, aes(x = year)) +
    geom_line(aes(y = attribute_status_pca, color = "Attribute Status"), size = 1) +
    geom_line(aes(y = overall_status_network_pca, color = "Network Status"), size = 1) +
    geom_line(aes(y = combined_status_score, color = "Combined Status"), size = 1, linetype = "dashed") +
    labs(x = "Year", y = "Status Scores", color = "Status Dimension",
         title = paste("Status Trajectory for", country_name)) +
    theme_minimal()
  
  print(p)
}

# Create individual plots for countries with status inconsistency
for(country_name in status_inconsistency$country[1:3]) {  # First 3 inconsistent countries
  country_data <- integrated_status %>%
    filter(country == country_name)
  
  p <- ggplot(country_data, aes(x = year)) +
    geom_line(aes(y = attribute_status_pca, color = "Attribute Status"), size = 1) +
    geom_line(aes(y = overall_status_network_pca, color = "Network Status"), size = 1) +
    geom_line(aes(y = status_inconsistency, color = "Status Inconsistency"), size = 1, linetype = "dotted") +
    labs(x = "Year", y = "Status Scores", color = "Status Dimension",
         title = paste("Status Inconsistency for", country_name)) +
    theme_minimal()
  
  print(p)
}

3. Advanced Visualization

3.1 Interactive Visualizations

Create interactive visualizations to explore status dimensions:

if(!"ir_category" %in% colnames(integrated_status)) {
  # Create IR categories based on quartiles of combined status score
  integrated_status <- integrated_status %>%
    group_by(year) %>%
    mutate(
      status_quartile = ntile(combined_status_score, 4),
      ir_category = case_when(
        status_quartile == 4 ~ "Great Power",
        status_quartile == 3 ~ "Major Power",
        status_quartile == 2 ~ "Middle Power",
        status_quartile == 1 ~ "Minor Power"
      )
    ) %>%
    ungroup()
}

# Create interactive scatter plot with plotly
interactive_plot <- integrated_status %>%
  filter(year == max(year)) %>%  # Most recent year
  plot_ly(
    x = ~attribute_status_pca,
    y = ~overall_status_network_pca,
    color = ~ir_category,
    size = ~status_inconsistency,
    text = ~paste("Country:", country, 
                 "<br>Attribute Status:", round(attribute_status_pca, 2),
                 "<br>Network Status:", round(overall_status_network_pca, 2),
                 "<br>Combined Score:", round(combined_status_score, 2),
                 "<br>IR Category:", ir_category),
    type = "scatter",
    mode = "markers"
  ) %>%
  layout(
    title = paste("Interactive Status Space -", max(integrated_status$year)),
    xaxis = list(title = "Attribute-Based Status"),
    yaxis = list(title = "Network-Based Status"),
    hovermode = "closest"
  )

# Display the interactive plot
interactive_plot
# Create a 3D visualization using t-SNE for dimensionality reduction
# This allows visualizing multiple status dimensions simultaneously
if(length(unique(integrated_status$year)) >= 3) {
  # Select a subset of years for visualization
  years_to_viz <- sort(unique(integrated_status$year), decreasing = TRUE)[1:3]
  
  tsne_data <- integrated_status %>%
    filter(year %in% years_to_viz) %>%
    select(country, year, attribute_status_pca, overall_status_network_pca, 
           combined_status_score, status_inconsistency, ir_category)
  
  # Run t-SNE on the status variables
  set.seed(123)  # For reproducibility
  tsne_result <- tsne(tsne_data %>% 
                     select(attribute_status_pca, overall_status_network_pca, 
                            combined_status_score, status_inconsistency) %>%
                     as.matrix())
  
  # Add t-SNE coordinates to the data
  tsne_data$tsne1 <- tsne_result[,1]
  tsne_data$tsne2 <- tsne_result[,2]
  
  # Create interactive t-SNE plot
  tsne_plot <- plot_ly(
    data = tsne_data,
    x = ~tsne1,
    y = ~tsne2,
    color = ~ir_category,
    symbol = ~factor(year),
    text = ~paste("Country:", country, 
                 "<br>Year:", year,
                 "<br>IR Category:", ir_category,
                 "<br>Combined Score:", round(combined_status_score, 2)),
    type = "scatter",
    mode = "markers"
  ) %>%
  layout(
    title = "t-SNE Visualization of Status Dimensions",
    xaxis = list(title = "t-SNE Dimension 1"),
    yaxis = list(title = "t-SNE Dimension 2"),
    hovermode = "closest"
  )
  
  # Display the t-SNE plot
  tsne_plot
}
# Create interactive time series visualization of status evolution
# Select a few prominent countries for visualization
prominent_countries <- c(
  "United States", "USSR", "Russia", "China", 
  "United Kingdom", "France", "Germany", "Japan", 
  "India", "Brazil"
)

# Filter for countries that exist in our dataset
available_countries <- prominent_countries[prominent_countries %in% unique(integrated_status$country)]

if(length(available_countries) > 0) {
  time_series_data <- integrated_status %>%
    filter(country %in% available_countries)
  
  # Create interactive time series plot
  time_series_plot <- plot_ly() %>%
    layout(
      title = "Status Evolution of Major Powers",
      xaxis = list(title = "Year"),
      yaxis = list(title = "Combined Status Score"),
      hovermode = "closest"
    )
  
  # Add a line for each country
  for(country_name in available_countries) {
    country_data <- time_series_data %>%
      filter(country == country_name)
    
    time_series_plot <- time_series_plot %>%
      add_trace(
        data = country_data,
        x = ~year,
        y = ~combined_status_score,
        name = country_name,
        type = "scatter",
        mode = "lines+markers",
        text = ~paste("Country:", country, 
                     "<br>Year:", year,
                     "<br>Combined Score:", round(combined_status_score, 2),
                     "<br>IR Category:", ir_category)
      )
  }
  
  # Display the time series plot
  time_series_plot
}

4. Statistical Testing

4.1 Relationship Between Attribute and Network Status

Test the relationship between attribute-based and network-based status measures:

# Linear regression of network status on attribute status
lm_model <- lm(overall_status_network_pca ~ attribute_status_pca, data = integrated_status)

# Display regression summary
summary(lm_model)
## 
## Call:
## lm(formula = overall_status_network_pca ~ attribute_status_pca, 
##     data = integrated_status)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54601 -0.11676 -0.01139  0.09902  0.53879 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          0.303078   0.003954   76.65   <2e-16 ***
## attribute_status_pca 0.058309   0.001724   33.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1534 on 1503 degrees of freedom
## Multiple R-squared:  0.432,  Adjusted R-squared:  0.4317 
## F-statistic:  1143 on 1 and 1503 DF,  p-value: < 2.2e-16
# Plot the relationship with regression line
ggplot(integrated_status, aes(x = attribute_status_pca, y = overall_status_network_pca)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(x = "Attribute-Based Status", y = "Network-Based Status",
       title = "Relationship Between Attribute and Network Status") +
  theme_minimal()

# Check for non-linear relationships using a GAM
if(require(mgcv)) {
  gam_model <- gam(overall_status_network_pca ~ s(attribute_status_pca), data = integrated_status)
  
  # Summary of GAM model
  summary(gam_model)
  
  # Plot the GAM fit
  ggplot(integrated_status, aes(x = attribute_status_pca, y = overall_status_network_pca)) +
    geom_point(alpha = 0.5) +
    geom_smooth(method = "gam", formula = y ~ s(x), se = TRUE) +
    labs(x = "Attribute-Based Status", y = "Network-Based Status",
         title = "Non-linear Relationship Between Attribute and Network Status") +
    theme_minimal()
  
  # Compare linear and non-linear models
  AIC(lm_model, gam_model)
}
##                 df       AIC
## lm_model   3.00000 -1367.998
## gam_model 10.04312 -1486.076
# Test for relationship by year to see if it changes over time
# We'll run separate regressions for each year and compare coefficients
year_models <- integrated_status %>%
  nest_by(year) %>%
  mutate(
    model = list(lm(overall_status_network_pca ~ attribute_status_pca, data = data)),
    tidied = list(tidy(model))
  ) %>%
  unnest(tidied) %>%
  filter(term == "attribute_status_pca") %>%
  select(year, estimate, std.error, p.value)

# Display coefficient changes over time
ggplot(year_models, aes(x = year, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = estimate - 1.96*std.error, 
                    ymax = estimate + 1.96*std.error), width = 0.2) +
  geom_line() +
  labs(x = "Year", y = "Coefficient Estimate",
       title = "Relationship Between Attribute and Network Status Over Time") +
  theme_minimal()

4.2 ANOVA and Group Comparisons

Conduct ANOVA to compare status across different groups:

# ANOVA to compare attribute status across IR categories
anova_attribute <- aov(attribute_status_pca ~ ir_category, data = integrated_status)
summary(anova_attribute)
##               Df Sum Sq Mean Sq F value Pr(>F)    
## ir_category    3   3633  1211.0   424.7 <2e-16 ***
## Residuals   1501   4279     2.9                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ANOVA to compare network status across IR categories
anova_network <- aov(overall_status_network_pca ~ ir_category, data = integrated_status)
summary(anova_network)
##               Df Sum Sq Mean Sq F value Pr(>F)    
## ir_category    3  47.23   15.74    1572 <2e-16 ***
## Residuals   1501  15.04    0.01                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ANOVA to compare combined status across IR categories
anova_combined <- aov(combined_status_score ~ ir_category, data = integrated_status)
summary(anova_combined)
##               Df Sum Sq Mean Sq F value Pr(>F)    
## ir_category    3  895.7  298.55    1278 <2e-16 ***
## Residuals   1501  350.6    0.23                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Post-hoc tests for combined status
if(require(emmeans)) {
  posthoc <- emmeans(anova_combined, pairwise ~ ir_category)
  summary(posthoc)
} else {
  # Alternative using TukeyHSD
  tukey_results <- TukeyHSD(anova_combined)
  print(tukey_results)
}
## $emmeans
##  ir_category   emmean     SE   df lower.CL upper.CL
##  Great Power   1.2075 0.0251 1501   1.1583    1.257
##  Major Power   0.0912 0.0249 1501   0.0423    0.140
##  Middle Power -0.3928 0.0249 1501  -0.4415   -0.344
##  Minor Power  -0.8785 0.0248 1501  -0.9271   -0.830
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                   estimate     SE   df t.ratio p.value
##  Great Power - Major Power     1.116 0.0354 1501  31.562  <.0001
##  Great Power - Middle Power    1.600 0.0353 1501  45.306  <.0001
##  Great Power - Minor Power     2.086 0.0353 1501  59.135  <.0001
##  Major Power - Middle Power    0.484 0.0352 1501  13.748  <.0001
##  Major Power - Minor Power     0.970 0.0352 1501  27.583  <.0001
##  Middle Power - Minor Power    0.486 0.0351 1501  13.835  <.0001
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
# Visualize status differences across IR categories
ggplot(integrated_status, aes(x = ir_category, y = combined_status_score, fill = ir_category)) +
  geom_boxplot() +
  labs(x = "IR Category", y = "Combined Status Score",
       title = "Status Score Differences Across IR Categories") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Compare status inconsistency across IR categories
anova_inconsistency <- aov(status_inconsistency ~ ir_category, data = integrated_status)
summary(anova_inconsistency)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## ir_category    3   13.6   4.547   18.03 1.71e-11 ***
## Residuals   1501  378.6   0.252                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Visualize status inconsistency across IR categories
ggplot(integrated_status, aes(x = ir_category, y = status_inconsistency, fill = ir_category)) +
  geom_boxplot() +
  labs(x = "IR Category", y = "Status Inconsistency",
       title = "Status Inconsistency Across IR Categories") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

4.3 Status Consistency Analysis

Analyze the factors that contribute to status consistency/inconsistency:

# Regression model to predict status inconsistency
consistency_model <- lm(status_inconsistency ~ attribute_status_pca + 
                         overall_status_network_pca + 
                         factor(year), 
                       data = integrated_status)

# Display model summary
summary(consistency_model)
## 
## Call:
## lm(formula = status_inconsistency ~ attribute_status_pca + overall_status_network_pca + 
##     factor(year), data = integrated_status)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1380 -0.3580 -0.0790  0.2812  4.0775 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 0.782794   0.067599  11.580  < 2e-16 ***
## attribute_status_pca        0.028459   0.009111   3.124 0.001821 ** 
## overall_status_network_pca  0.362602   0.093169   3.892 0.000104 ***
## factor(year)1965           -0.222506   0.070277  -3.166 0.001576 ** 
## factor(year)1970           -0.225982   0.068832  -3.283 0.001050 ** 
## factor(year)1975           -0.249145   0.067786  -3.675 0.000246 ***
## factor(year)1980           -0.257665   0.067171  -3.836 0.000130 ***
## factor(year)1985           -0.356704   0.067819  -5.260 1.65e-07 ***
## factor(year)1990           -0.384170   0.068655  -5.596 2.61e-08 ***
## factor(year)1995           -0.292182   0.068837  -4.245 2.33e-05 ***
## factor(year)2000           -0.273767   0.069796  -3.922 9.17e-05 ***
## factor(year)2005           -0.160104   0.070881  -2.259 0.024041 *  
## factor(year)2010           -0.150788   0.070820  -2.129 0.033404 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4874 on 1492 degrees of freedom
## Multiple R-squared:  0.09641,    Adjusted R-squared:  0.08915 
## F-statistic: 13.27 on 12 and 1492 DF,  p-value: < 2.2e-16
# Visualize status inconsistency by year
ggplot(integrated_status, aes(x = factor(year), y = status_inconsistency)) +
  stat_summary(fun = mean, geom = "bar", fill = "skyblue") +
  stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
  labs(x = "Year", y = "Mean Status Inconsistency",
       title = "Status Inconsistency Over Time") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Identify countries with largest inconsistencies over time
top_inconsistent <- integrated_status %>%
  group_by(country) %>%
  summarise(
    mean_inconsistency = mean(status_inconsistency, na.rm = TRUE),
    n_years = n()
  ) %>%
  filter(n_years >= 3) %>%  # At least 3 observations
  arrange(desc(mean_inconsistency)) %>%
  head(10)

# Display top inconsistent countries
top_inconsistent %>%
  kable("html", caption = "Countries with Highest Status Inconsistency") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Countries with Highest Status Inconsistency
country mean_inconsistency n_years
Yugoslavia 2.043225 7
Luxembourg 1.687992 11
Czechoslovakia 1.680845 7
Cuba 1.661572 11
Northern Cyprus, Turkish Republic of 1.631904 6
Egypt 1.593889 11
United States 1.545607 11
China 1.478240 11
German Democratic Republic 1.424568 6
Ethiopia 1.222657 7